Before we create Metacells from the downsampled scRNA-seq dataset, we need to filter out cells and genes based on QC metrics. This is to avoid making outliers even more pronounced in the dataset once we start merging cells into Metacells.

1 Loading the dataset

Load libraries.

library(Seurat)
library(SeuratObject)
library(dplyr)
library(tidyverse)

Load the single cell seurat object.

se <- readRDS("vignette_output/se_allen_brain_atlas_250_cells.RDS")
se
## An object of class Seurat 
## 31053 features across 10172 samples within 1 assay 
## Active assay: RNA (31053 features, 0 variable features)

2 Cells before filtering

Check number of cells per annotated class (subclass_label).

table(se$subclass_label)
## 
##           Astro        CA1-ProS       CA2-IG-FC             CA3            Car3 
##             250             250             250             250             250 
##              CR          CT SUB              DG            Endo      L2 IT ENTl 
##             250             250             250             250             250 
##      L2 IT ENTm     L2/3 IT CTX    L2/3 IT ENTl     L2/3 IT PPP     L2/3 IT RHP 
##             250             250             250             250             250 
##       L3 IT ENT      L4 RSP-ACA     L4/5 IT CTX       L5 IT CTX          L5 PPP 
##             250             250             250             250             250 
##       L5 PT CTX L5/6 IT TPE-ENT     L5/6 NP CTX       L6 CT CTX       L6 IT CTX 
##             250             250             250             250             250 
##      L6 IT ENTl         L6b CTX      L6b/CT ENT           Lamp5           Meis2 
##             250             250             250             250              20 
##       Micro-PVM          NP PPP          NP SUB           Oligo           Pvalb 
##             250             250             250             250             250 
##        SMC-Peri            Sncg             Sst       Sst Chodl        SUB-ProS 
##             250             250             250             250             250 
##             Vip            VLMC 
##             250             152

3 QC metrics before filtering

Collect QC metrics for mitochondrial and ribosomal genes.

se[["percent.mt"]] <- PercentageFeatureSet(se, pattern = "^mt-")
se[["percent.rb"]] <- PercentageFeatureSet(se, pattern = "^Rpl|^Rps")

Plot QC metrics:

VlnPlot(se, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 1, group.by = "subclass_label")

RidgePlot(se, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 1, group.by = "subclass_label")
## Picking joint bandwidth of 252
## Picking joint bandwidth of 1240
## Picking joint bandwidth of 0.344
## Picking joint bandwidth of 0.285

4 Filtering

Filtering cells based on number of genes (nFeature_RNA), UMIs (nCount_RNA), mitochondrial-% (percent.mt) and robosomal-% (percent.rb).

# Seurat object before filtering
se
## An object of class Seurat 
## 31053 features across 10172 samples within 1 assay 
## Active assay: RNA (31053 features, 0 variable features)
# Filtering
se <- subset(se, subset = nFeature_RNA > 200 & nFeature_RNA < 8000 & 
                    nCount_RNA > 500 & nCount_RNA < 35000 & 
                    percent.mt <= 10 & percent.rb <= 15)

# Seurat object after filtering
se
## An object of class Seurat 
## 31053 features across 9930 samples within 1 assay 
## Active assay: RNA (31053 features, 0 variable features)

Filtering out low abundance genes.

# Seurat object before filtering
se
## An object of class Seurat 
## 31053 features across 9930 samples within 1 assay 
## Active assay: RNA (31053 features, 0 variable features)
# Define a threshold for the total UMI count
min_umi <- 100

# Calculate the total UMIs per gene
gene_totals <- Matrix::rowSums(GetAssayData(se, slot = "counts"))

# Subset genes with total UMIs above the threshold
se <- subset(se, features = rownames(se)[gene_totals >= min_umi])

# Seurat object after filtering
se
## An object of class Seurat 
## 15353 features across 9930 samples within 1 assay 
## Active assay: RNA (15353 features, 0 variable features)

5 QC metrics after filtering

Plot QC metrics:

VlnPlot(se, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 1, group.by = "subclass_label")

RidgePlot(se, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 1, group.by = "subclass_label")
## Picking joint bandwidth of 247
## Picking joint bandwidth of 1210
## Picking joint bandwidth of 0.313
## Picking joint bandwidth of 0.282

6 Cells after filtering

Inspect how many cells are left after filtering. Take note if some cell types are disproportionately affected by your filtering settings. Note that when creating Metacells, all annotated classes with less than 50 will be excluded. In this case, Meis2.

# Check number of cells per annotated class after filtering
table(se$subclass_label)
## 
##           Astro        CA1-ProS       CA2-IG-FC             CA3            Car3 
##             216             250             248             248             246 
##              CR          CT SUB              DG            Endo      L2 IT ENTl 
##             246             249             250             246             250 
##      L2 IT ENTm     L2/3 IT CTX    L2/3 IT ENTl     L2/3 IT PPP     L2/3 IT RHP 
##             250             249             248             248             249 
##       L3 IT ENT      L4 RSP-ACA     L4/5 IT CTX       L5 IT CTX          L5 PPP 
##             250             232             250             249             248 
##       L5 PT CTX L5/6 IT TPE-ENT     L5/6 NP CTX       L6 CT CTX       L6 IT CTX 
##             237             244             250             249             249 
##      L6 IT ENTl         L6b CTX      L6b/CT ENT           Lamp5           Meis2 
##             250             246             249             250              19 
##       Micro-PVM          NP PPP          NP SUB           Oligo           Pvalb 
##             211             250             250             235             243 
##        SMC-Peri            Sncg             Sst       Sst Chodl        SUB-ProS 
##             204             246             250             248             247 
##             Vip            VLMC 
##             245             136

Save filtered Seurat object.

saveRDS(se,file = "vignette_output/se_250_cells_filtered.RDS")
Click to show Session Info
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.2      stringr_1.4.1      purrr_1.0.1        readr_2.1.3       
##  [5] tidyr_1.2.1        tibble_3.2.1       ggplot2_3.3.6      tidyverse_1.3.2   
##  [9] dplyr_1.1.4        SeuratObject_4.1.3 Seurat_4.3.0      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.1           backports_1.4.1        plyr_1.8.7            
##   [4] igraph_1.3.5           lazyeval_0.2.2         sp_1.5-0              
##   [7] splines_4.2.1          listenv_0.8.0          scattermore_0.8       
##  [10] digest_0.6.30          htmltools_0.5.3        fansi_1.0.3           
##  [13] magrittr_2.0.3         tensor_1.5             googlesheets4_1.0.1   
##  [16] cluster_2.1.3          ROCR_1.0-11            tzdb_0.3.0            
##  [19] globals_0.16.1         modelr_0.1.9           matrixStats_0.62.0    
##  [22] timechange_0.2.0       spatstat.sparse_3.0-0  colorspace_2.0-3      
##  [25] rvest_1.0.3            ggrepel_0.9.1          haven_2.5.1           
##  [28] xfun_0.34              crayon_1.5.2           jsonlite_1.8.7        
##  [31] progressr_0.11.0       spatstat.data_3.0-0    survival_3.3-1        
##  [34] zoo_1.8-11             glue_1.6.2             polyclip_1.10-4       
##  [37] gtable_0.3.1           gargle_1.2.1           leiden_0.4.3          
##  [40] future.apply_1.9.1     abind_1.4-5            scales_1.2.1          
##  [43] DBI_1.1.3              spatstat.random_3.1-2  miniUI_0.1.1.1        
##  [46] Rcpp_1.0.9             viridisLite_0.4.1      xtable_1.8-4          
##  [49] reticulate_1.26        htmlwidgets_1.5.4      httr_1.4.6            
##  [52] RColorBrewer_1.1-3     ellipsis_0.3.2         ica_1.0-3             
##  [55] farver_2.1.1           pkgconfig_2.0.3        sass_0.4.2            
##  [58] uwot_0.1.14            dbplyr_2.2.1           deldir_1.0-6          
##  [61] utf8_1.2.2             labeling_0.4.2         tidyselect_1.2.0      
##  [64] rlang_1.1.2            reshape2_1.4.4         later_1.3.0           
##  [67] munsell_0.5.0          cellranger_1.1.0       tools_4.2.1           
##  [70] cachem_1.0.6           cli_3.4.1              generics_0.1.3        
##  [73] broom_1.0.1            ggridges_0.5.4         evaluate_0.17         
##  [76] fastmap_1.1.0          yaml_2.3.6             goftest_1.2-3         
##  [79] knitr_1.40             fs_1.5.2               fitdistrplus_1.1-8    
##  [82] RANN_2.6.1             pbapply_1.5-0          future_1.28.0         
##  [85] nlme_3.1-157           mime_0.12              xml2_1.3.3            
##  [88] compiler_4.2.1         rstudioapi_0.14        plotly_4.10.0         
##  [91] png_0.1-7              spatstat.utils_3.1-0   reprex_2.0.2          
##  [94] bslib_0.4.0            stringi_1.7.8          highr_0.9             
##  [97] lattice_0.20-45        Matrix_1.5-1           vctrs_0.6.4           
## [100] pillar_1.9.0           lifecycle_1.0.3        spatstat.geom_3.0-5   
## [103] lmtest_0.9-40          jquerylib_0.1.4        RcppAnnoy_0.0.19      
## [106] data.table_1.14.4      cowplot_1.1.1          irlba_2.3.5.1         
## [109] httpuv_1.6.6           patchwork_1.1.2        R6_2.5.1              
## [112] promises_1.2.0.1       KernSmooth_2.23-20     gridExtra_2.3         
## [115] parallelly_1.32.1      codetools_0.2-18       MASS_7.3-60           
## [118] assertthat_0.2.1       withr_2.5.0            sctransform_0.3.5     
## [121] parallel_4.2.1         hms_1.1.2              grid_4.2.1            
## [124] rmarkdown_2.17         googledrive_2.0.0      Rtsne_0.16            
## [127] spatstat.explore_3.0-5 shiny_1.7.2            lubridate_1.9.3